options(digits = 4, width = 132)

library(here)
library(tidyverse)
library(RItools)

source(here::here("Design", "nonbimatchingfunctions.R"))

load(here::here("Design", "matches_anyDA_Diversity_new.rda"), verbose = TRUE)
load(here::here("Data", "wrkdatOwnMap_anyDA_new.rda"), verbose = TRUE)

wdat0 <- wrkdatOwnMap_new %>%
  filter(!is.na(vm_change) & !is.na(social.capital01) &
    !is.na(community.resp01) & !is.na(da_diversity_index)) %>%
  droplevels()

wdat0 <- left_join(wdat0, matches_anyDA_Diversity_new)
table(is.na(wdat0$pair))

## Easier to think about "higher perceiver" versus "lower perceiver" when it comes to asking whether the matching worked.
## Further simplify the working file
wdat1 <- wdat0 %>%
  filter(!is.na(pair)) %>%
  group_by(pair) %>%
  mutate(perc_more = rank(subjcomm.diversity.index) - 1) %>%
  ungroup() %>%
  mutate(pairF = factor(pair)) %>%
  select(
    perc_more, da_prop_vm_20pct_06, vm_change, pair, pairF, social.capital01, community.resp01,
    vm.community.norm2, vm.community.subj, vcid, dauid, csd_pop_06, csd_prop_vm_20pct_06,
    da_pop_dens_06, csd_urban_06, csd_pop_dens_06, community_area_km, da_diversity_index
  ) %>%
  droplevels()

table(wdat1$perc_more)

xb1_ranked0 <- xBalance(perc_more ~ da_diversity_index + vm_change,
  strata = list(unstr = NULL, pair = ~pair), report = "all", data = wdat1
)
xb1_ranked0$overall
xb1_ranked0$results

xb1_ranked <- balanceTest(perc_more ~ da_diversity_index + vm_change + strata(pair), data = wdat1, p.adjust = "none")
xb1_ranked$overall
xb1_ranked$results[, c("Control", "Treatment", "adj.diff", "std.diff", "p"), ] # , "pair"]


## An alternative for the overall test which should be fairly close to it
library(formula.tools)
library(coin)
coin_fmla <- da_diversity_index + vm_change ~  vm.community.subj | pairF
coin_test <- independence_test(coin_fmla, data = wdat1, teststat = "quadratic")
coin_test_perm <- independence_test(coin_fmla, data = wdat1, teststat = "quadratic", distribution = approximate(nresample = 1000))
coin::pvalue(coin_test)
coin::pvalue(coin_test_perm)

coin_fmla_rank <- da_diversity_index + vm_change ~ perc_more | pairF
coin_test_rank <- independence_test(coin_fmla_rank, data = wdat1, teststat = "quadratic")
coin_test_rank_perm <- independence_test(coin_fmla_rank,
  data = wdat1,
  teststat = "quadratic", distribution = approximate(nresample = 1000)
)
coin::pvalue(coin_test_rank)
coin::pvalue(coin_test_rank_perm)

pairdiffs <- wdat1 %>%
  group_by(pair) %>%
  summarize(
    da_diversity_index = abs(diff(da_diversity_index)),
    da_prop_vm_20pct_06 = abs(diff(da_prop_vm_20pct_06)),
    vm_change = abs(diff(vm_change)),
    vm.community.subj = abs(diff(vm.community.subj)),
    csd_pop_06 = abs(diff(csd_pop_06)),
    csd_pop_dens_06 = abs(diff(csd_pop_dens_06)),
    csd_prop_vm_20pct_06 = abs(diff(csd_prop_vm_20pct_06)),
    # da_pop_dens_06 = abs(diff(da_pop_dens_06)),
    community.area.km = abs(diff(community_area_km))
  )

sapply(pairdiffs, summary)
#         pair da_diversity_index da_prop_vm_20pct_06 vm_change vm.community.subj csd_pop_06 csd_pop_dens_06
# Min.       1          0.000e+00            0.000000 0.0000000            0.0000          0           0.000
# 1st Qu.  542          0.000e+00            0.000000 0.0008856            0.0700       3066           9.929
# Median  1083          0.000e+00            0.006342 0.0092457            0.1600      49462         118.742
# Mean    1083          8.057e-06            0.016882 0.0098756            0.2393     325599         494.525
# 3rd Qu. 1624          8.903e-06            0.021930 0.0172610            0.3300     372105         390.514
# Max.    2165          8.559e-05            0.305507 0.0250000            4.1900    2501992        4576.619
#         csd_prop_vm_20pct_06 community.area.km
# Min.                0.000000         1.618e-05
# 1st Qu.             0.003592         9.829e+00
# Median              0.023915         7.486e+01
# Mean                0.069441         8.359e+02
# 3rd Qu.             0.098402         4.069e+02
# Max.                0.554212         3.653e+04

sapply(pairdiffs, function(x) {
  quantile(x, sort(unique(c(seq(0, 1, .1), seq(.9, 1, .01)))), na.rm = TRUE)
})
#        pair da_diversity_index da_prop_vm_20pct_06 vm_change vm.community.subj csd_pop_06 csd_pop_dens_06
# 0%      1.0          0.000e+00           0.000e+00  0.000000            0.0000        0.0           0.000
# 10%   217.4          0.000e+00           0.000e+00  0.000000            0.0200        0.0           0.000
# 20%   433.8          0.000e+00           0.000e+00  0.000000            0.0500      834.2           2.591
# 30%   650.2          0.000e+00           0.000e+00  0.002376            0.0800     6026.0          19.654
# 40%   866.6          0.000e+00           6.576e-05  0.005779            0.1200    14470.6          51.768
# 50%  1083.0          0.000e+00           6.342e-03  0.009246            0.1600    49462.0         118.742
# 60%  1299.4          2.233e-06           1.243e-02  0.012363            0.2100    96921.0         214.630
# 70%  1515.8          6.865e-06           1.942e-02  0.015790            0.2800   271593.0         308.522
# 80%  1732.2          1.275e-05           2.703e-02  0.018868            0.3800   550721.0         546.157
# 90%  1948.6          2.810e-05           4.821e-02  0.021830            0.5400   902579.0        2313.991
# 91%  1970.2          3.040e-05           5.221e-02  0.022222            0.5724   970285.0        2612.742
# 92%  1991.9          3.368e-05           5.436e-02  0.022586            0.6000  1381436.0        2622.167
# 93%  2013.5          3.691e-05           5.782e-02  0.022896            0.6500  1515088.0        2757.185
# 94%  2035.2          3.896e-05           6.140e-02  0.023265            0.6900  1772909.0        2904.356
# 95%  2056.8          4.216e-05           6.994e-02  0.023529            0.7300  1869830.0        2933.122
# 96%  2078.4          4.776e-05           7.833e-02  0.023782            0.8100  2150886.0        3109.874
# 97%  2100.1          5.430e-05           8.862e-02  0.024162            0.8700  2338866.0        3349.652
# 98%  2121.7          5.931e-05           9.940e-02  0.024465            0.9800  2403221.0        3507.443
# 99%  2143.4          6.651e-05           1.317e-01  0.024709            1.1736  2436495.2        3771.346
# 100% 2165.0          8.559e-05           3.055e-01  0.025000            4.1900  2501992.0        4576.619
#      csd_prop_vm_20pct_06 community.area.km
# 0%               0.000000         1.618e-05
# 10%              0.000000         1.836e+00
# 20%              0.001314         6.283e+00
# 30%              0.006295         1.466e+01
# 40%              0.013039         3.306e+01
# 50%              0.023915         7.486e+01
# 60%              0.039835         1.410e+02
# 70%              0.069195         2.754e+02
# 80%              0.127043         6.502e+02
# 90%              0.202444         1.640e+03
# 91%              0.214298         1.840e+03
# 92%              0.232126         2.038e+03
# 93%              0.240701         2.428e+03
# 94%              0.289123         2.778e+03
# 95%              0.306705         3.349e+03
# 96%              0.328076         4.321e+03
# 97%              0.348050         5.992e+03
# 98%              0.389048         9.398e+03
# 99%              0.410908         1.602e+04
# 100%             0.554212         3.653e+04

## Range of the index among those matched
summary(wdat1$da_diversity_index)
#     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.
# 0.000000 0.000000 0.000566 0.005653 0.003544 0.330917

# number of people in final design
nrow(wdat1)
# [1] 4330
# number of DAs
length(unique(wdat1$dauid))
# [1] 3599

system("touch Design/match_assess_anyDA_Diversity_new.done")
